{
 "cells": [
  {
   "cell_type": "code",
   "id": "initial_id",
   "metadata": {
    "collapsed": true,
    "ExecuteTime": {
     "end_time": "2025-04-10T20:40:48.777282Z",
     "start_time": "2025-04-10T20:40:48.529580Z"
    }
   },
   "source": [
    "from kalpy.decoder.data import FstArchive\n",
    "from kalpy.gmm.align import GmmAligner\n",
    "from kalpy.feat.data import FeatureArchive\n",
    "from kalpy.utils import generate_read_specifier\n",
    "from _kalpy.util import RandomAccessInt32VectorVectorReader\n",
    "from _kalpy.gmm import gmm_align_compiled, gmm_align_reference_phones\n",
    "from _kalpy.gmm import DecodableAmDiagGmmScaledMasked"
   ],
   "outputs": [],
   "execution_count": 1
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-04-10T20:40:49.283321Z",
     "start_time": "2025-04-10T20:40:49.281135Z"
    }
   },
   "cell_type": "code",
   "source": [
    "utterance_id = \"540-37852\"\n",
    "reference_phones_path = r\"D:\\temp\\MFA\\czech\\czech\\split10\\ref_phones.1.7.ark\"\n",
    "fst_path = r\"D:\\temp\\MFA\\czech\\sat_4_ali\\fsts.1.7.ark\"\n",
    "feat_path = r\"D:\\temp\\MFA\\czech\\czech\\split10\\feats.1.7.scp\"\n",
    "lda_mat_path = r\"D:\\temp\\MFA\\czech\\sat_4_ali\\lda.mat\"\n",
    "model_path = r\"D:\\temp\\MFA\\czech\\sat_4_ali\\final.alimdl\""
   ],
   "id": "20c452aca6fc4b1e",
   "outputs": [],
   "execution_count": 2
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-04-10T20:42:29.156949Z",
     "start_time": "2025-04-10T20:42:23.253791Z"
    }
   },
   "cell_type": "code",
   "source": [
    "reference_phone_archive = RandomAccessInt32VectorVectorReader(generate_read_specifier(reference_phones_path))\n",
    "reference_phones = [x[0] for x in reference_phone_archive.Value(utterance_id)]\n",
    "reference_phones[0] = -1\n",
    "\n",
    "feature_archive = FeatureArchive(\n",
    "            feat_path,\n",
    "            lda_mat_file_name=lda_mat_path,\n",
    "        )\n",
    "training_graph_archive = FstArchive(fst_path)\n",
    "\n",
    "training_graph = training_graph_archive[utterance_id]\n",
    "features = feature_archive[utterance_id]\n",
    "\n",
    "aligner = GmmAligner(model_path)\n",
    "\n",
    "print(len(reference_phones))\n",
    "print(features.NumRows())"
   ],
   "id": "4fe37da9ca524207",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "324\n",
      "324\n"
     ]
    }
   ],
   "execution_count": 11
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": [
    "alignment = aligner.align_utterance(training_graph, features, utterance_id, reference_phones)\n",
    "alignment_phones = [aligner.transition_model.TransitionIdToPhone(x) for x in alignment.alignment]"
   ],
   "id": "71fa0d469fbc98af",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-04-10T20:42:03.732831Z",
     "start_time": "2025-04-10T20:42:03.722827Z"
    }
   },
   "cell_type": "code",
   "source": [
    "(\n",
    "                alignment,\n",
    "                words,\n",
    "                likelihood,\n",
    "                per_frame_log_likelihoods,\n",
    "                successful,\n",
    "                retried,\n",
    "            ) = gmm_align_reference_phones(\n",
    "                aligner.transition_model,\n",
    "                aligner.acoustic_model,\n",
    "                training_graph,\n",
    "                features,\n",
    "                reference_phones,\n",
    "                acoustic_scale=aligner.acoustic_scale,\n",
    "                transition_scale=aligner.transition_scale,\n",
    "                self_loop_scale=aligner.self_loop_scale,\n",
    "                beam=aligner.beam,\n",
    "                retry_beam=aligner.retry_beam,\n",
    "                careful=aligner.careful,\n",
    "            )\n",
    "print(alignment[:10])\n",
    "alignment_phones = [aligner.transition_model.TransitionIdToPhone(x) for x in alignment]"
   ],
   "id": "c225d8800695e673",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[]\n"
     ]
    }
   ],
   "execution_count": 10
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-04-10T20:41:00.656228Z",
     "start_time": "2025-04-10T20:41:00.653252Z"
    }
   },
   "cell_type": "code",
   "source": "decodable = DecodableAmDiagGmmScaledMasked(aligner.acoustic_model, aligner.transition_model, features, reference_phones, 1.0)",
   "id": "1377ef3717b0ef5d",
   "outputs": [],
   "execution_count": 4
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-04-10T20:41:42.641759Z",
     "start_time": "2025-04-10T20:41:42.637568Z"
    }
   },
   "cell_type": "code",
   "source": [
    "like = decodable.LogLikelihood(1, 2)\n",
    "print(like)"
   ],
   "id": "6055811373db0a0e",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-1000000.0\n"
     ]
    }
   ],
   "execution_count": 7
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": "",
   "id": "1818b0c8f25bdb46",
   "outputs": [],
   "execution_count": null
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-04-10T20:42:01.206122Z",
     "start_time": "2025-04-10T20:42:01.204096Z"
    }
   },
   "cell_type": "code",
   "source": "reference_phones = [-1 for x in reference_phone_archive.Value(utterance_id)]",
   "id": "a540a11da19d899e",
   "outputs": [],
   "execution_count": 9
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-04-10T20:41:08.073752Z",
     "start_time": "2025-04-10T20:41:08.070240Z"
    }
   },
   "cell_type": "code",
   "source": "print(reference_phones)",
   "id": "777c9380521b4adc",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[-1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 46, 46, 46, 46, 46, 46, 46, 8, 8, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 12, 12, 12, 12, 12, 12, 12, 12, 45, 45, 45, 45, 22, 22, 22, 22, 22, 16, 16, 16, 16, 16, 16, 16, 46, 46, 46, 46, 46, 46, 46, 36, 36, 36, 36, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 22, 22, 22, 22, 8, 8, 8, 8, 8, 8, 8, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]\n"
     ]
    }
   ],
   "execution_count": 6
  },
  {
   "metadata": {},
   "cell_type": "code",
   "source": "",
   "id": "7ac0064bed7fd1b5",
   "outputs": [],
   "execution_count": null
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
